In [348]:
import numpy as np
import matplotlib
import matplotlib.pyplot as pl
%matplotlib inline
import numexpr

import scipy.integrate as si
import scipy.optimize as so
from scipy.misc import logsumexp

Generate some fake data


In [370]:
N = 250
# v = np.random.lognormal(5, sigma=0.005, size=N)
v = np.ones(N)*150
# v = np.random.lognormal(5, sigma=0.25, size=N)
sini = np.sin(np.arccos(np.random.uniform(0.,1.,size=N)))
Q = v*sini
sigma_Q = np.zeros_like(v) + 5. # km/s

Q = Q + np.random.normal(0., sigma_Q)

# mask = Q > 5*sigma_Q # censor the data
mask = numexpr.evaluate("Q > 5*sigma_Q")

In [371]:
print("Fraction of censored objects: {}".format((~mask).sum() / float(N)))
Q = Q[mask]
sigma_Q = sigma_Q[mask]


Fraction of censored objects: 0.016

In [372]:
nbins = 25

fig,axes = pl.subplots(1,2,figsize=(10,5))
# axes[0].hist(v, bins=np.logspace(1,3,nbins));
# axes[0].set_xscale('log')
# axes[0].set_xlabel(r'$\log(v)$')

bins = np.linspace(0,200,nbins)
axes[0].hist(v, bins=bins);
axes[0].set_xlabel('$v$ [km s$^{-1}$]')
axes[0].set_xlim(0, 200)

axes[1].hist(Q, bins=bins);
axes[1].set_xlabel(r'$Q=v\sin i$ [km s$^{-1}$]')
axes[1].set_xlim(0, 200)


Out[372]:
(0, 200)

Checking that I can stil approximate this integral after 3.5 beers...


In [335]:
def log_integrand(x, mu, sigma, i):
    return -0.5*((x-mu*np.sin(i))/sigma)**2 - 0.5*np.log(2*np.pi) - np.log(sigma) + np.log(np.sin(i))

In [336]:
vv,qq,ss = (v_k[0], Q[0], sigma_Q[0])
res = si.quad(integrand, 0., np.pi/2., args=(vv, qq, ss))
np.log(res[0])


Out[336]:
-5.0604631750140374

In [337]:
nnn = 1000
eyes = np.linspace(0.,np.pi/2.,nnn)
logsumexp(log_integrand(qq,vv,ss,eyes)) + np.log(eyes[1]-eyes[0])


Out[337]:
-5.0604631750140232

The real shit


In [338]:
def ln_prior(a_k):
    if np.any(a_k < 0.):
        return -np.inf
    
    return -np.sum(a_k)

def ln_likelihood(a_k, v_k):
    neyes = 100
    eyes = np.linspace(0.,np.pi/2.,neyes)
    
    ll = 0.
    for QQ,ss in zip(Q,sigma_Q):
        L = []
        for v,a in zip(v_k,a_k):
            r = logsumexp(log_integrand(QQ,v,ss,eyes)) + np.log(eyes[1]-eyes[0])
            L.append(r + np.log(a))
        ll += logsumexp(L)
    
    if np.isnan(ll):
        return -np.inf
    
    return ll

def ln_posterior(a_k, v_k):
    return ln_prior(a_k) + ln_likelihood(a_k, v_k)

In [326]:
# a_k = np.ones(3) / 3.
a_k = np.array([0,1,0.])
v_k = np.array([100., 150., 200.])

In [327]:
all_a = np.linspace(0.,1.,101)
all_l = []
for aaa in all_a:
    aa = np.array([1-aaa,aaa,0.])
    all_l.append(ln_posterior(aa, v_k))
all_l = np.array(all_l)

pl.plot(all_a, np.exp(all_l - np.max(all_l)))


Out[327]:
[<matplotlib.lines.Line2D at 0x112b2b9d0>]

Try with emcee


In [339]:
import emcee

In [340]:
ndim = 5
v_k = np.linspace(Q.min(), Q.max(), ndim)
print v_k


[  27.20391184   79.48361403  131.76331622  184.04301841  236.32272061]

In [341]:
nwalkers = 16
sampler = emcee.EnsembleSampler(nwalkers, dim=ndim, lnpostfn=ln_posterior, args=(v_k,))
p0 = np.random.uniform(0.,1.,size=(nwalkers,ndim))
p0 = p0 / p0.sum(axis=1)[:,None]

In [342]:
pos,prob,state = sampler.run_mcmc(p0, 50)
sampler.reset()

In [343]:
pos,prob,state = sampler.run_mcmc(pos, 100)

In [344]:
for j in range(ndim):
    pl.figure()
    for walker in sampler.chain[...,j]:
        pl.plot(walker)
    pl.ylim(0,1)



In [345]:
nbins = 50
bins = np.linspace(0,200,nbins)

fig,axes = pl.subplots(1,2,figsize=(10,5))

axes[0].hist(Q, bins=bins);
axes[0].set_xlabel(r'$Q=v\sin i$ [km s$^{-1}$]')
axes[0].set_xlim(0, 200);

qs = []
for n in range(100):
    a = sampler.flatchain[np.random.randint(sampler.flatchain.shape[0])]
    sini = np.sin(np.arccos(np.random.uniform(0.,1.)))
    ix = np.random.choice(range(len(v_k)), p=a)
    qs.append(v_k[ix]*sini)

axes[1].hist(qs, bins=bins);
axes[1].set_xlim(0, 200);



In [375]:
from astropy.stats import median_absolute_deviation

In [378]:
bad_walkers = sampler.acceptance_fraction < 0.5
pos[bad_walkers] = np.random.normal(np.median(pos[~bad_walkers]), median_absolute_deviation(pos[~bad_walkers]))

In [346]:
nbins = 25
bins = np.linspace(0,200,nbins)

fig,axes = pl.subplots(1,2,figsize=(10,5),sharex=True)

axes[0].hist(v, bins=bins);
axes[0].set_xlabel(r'$v$ [km s$^{-1}$]')

vs = np.random.choice(v_k, p=a, size=10000)
axes[1].hist(vs, bins=bins);
axes[1].set_xlim(v_k.min(), v_k.max());